library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(brolgar)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(gapminder)
data <- read_csv("solver.csv")
## Rows: 1614 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): file
## dbl (3): alpha, accuracy, time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$file = gsub("/home/sgould/Desktop/projects/Crosswords-BERT/xd/", "\\1", data$file)
ggplot(data = data,
aes(x = alpha,
y = accuracy))+
geom_point(size = 1.2,
alpha = .8,
position = "jitter")+# to add some random noise for plotting purposes
theme_minimal()+
labs(title = "Accuracy and alpha values")

ggplot(data = data,
aes(x = alpha,
y = time))+
geom_point(size = 1.2,
alpha = .8,
position = "jitter")+# to add some random noise for plotting purposes
theme_minimal()+
labs(title = "Accuracy and time values")

data_100 = head(data, 256)
ggplot(data_100, aes(x=alpha, y=accuracy, color=factor(file))) +
geom_line() + geom_point() +
theme_bw()

p <- data %>%
ggplot( aes(time, alpha, size = accuracy, color=file)) +
geom_point() +
theme_bw()
ggplotly(p)
lmm <- lmer(accuracy~alpha+(1|file), data=data)
summary(lmm)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: accuracy ~ alpha + (1 | file)
## Data: data
##
## REML criterion at convergence: -7854.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9514 -0.5051 -0.0171 0.4577 6.3294
##
## Random effects:
## Groups Name Variance Std.Dev.
## file (Intercept) 0.0125899 0.11220
## Residual 0.0003071 0.01753
## Number of obs: 1614, groups: file, 92
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 8.956e-01 1.175e-02 9.218e+01 76.24 <2e-16 ***
## alpha -3.178e-02 1.653e-03 1.521e+03 -19.23 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## alpha -0.082
anova(lmm)
data